package net.baisoft.namesearch;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

import net.baisoft.namesearch.po.Name;
import net.sourceforge.pinyin4j.PinyinHelper;

/**
 * 姓名搜索器
 *
 * @author zhaozhantao@qq.com
 *
 */
public class NameSearcher {

    /**
     * 人们
     */
    private List<Name> names;

    /**
     * 人们对应的正则式
     */
    private List<String> regex;

    /**
     * 要搜索的关键字
     *
     * @param term
     * 			关键字
     * @return 人的集合
     */
    public List<Name> searchPeoples(String term) {
        List<Name> result = new ArrayList<Name>();
        for (int i = 0; i < regex.size(); i++) {
            if (term.matches(regex.get(i))) {
                result.add(names.get(i));
            }
        }
        return result;
    }

    /**
     * 设置被搜索的人群
     *
     * @param peoples
     * 			被搜索的人群
     */
    public void setPeoples(List<Name> peoples) {
        this.names = peoples;
        regex = new ArrayList<String>();
        //初始化正则式
        for (Name name : names) {



            //以下的char很有可能是代表“单字”的意思，诸猿注意啦

            //把姓名拆成单字的 e.g. ['赵','占','涛']
            char[] nameChars = name.getValue().toCharArray();

            //姓名长度，字数
            int length = nameChars.length;

            //对应的单个字的拼音e.g.["zhao","zhan","tao"] //后来又考虑了多音字
            String[][] charPinYins = new String[length][];
            for (int i = 0; i < length; i++) {
                String[] hanyuPinyinStringArray = PinyinHelper.toHanyuPinyinStringArray(nameChars[i]);

                //去重
                charPinYins[i] = disrepeat(hanyuPinyinStringArray);
            }

            // 单字的正则式
            String[] charRegexs = new String[length];
            for (int i = 0; i < length; i++) {//每个字
                charRegexs[i] = "(" + nameChars[i];
                for (int j = 0; j < charPinYins[i].length; j++) {//一个字的每个拼音
                    for (int k = 0; k < charPinYins[i][j].length(); k++) {//一个拼音的每个字母
                        charRegexs[i] += "|" + charPinYins[i][j].substring(0, k + 1);
                    }
                }
                charRegexs[i] += ")";
            }

            String mRegex = "";
            for (int i = 0; i < length; i ++) {
                for (int j = i; j < length; j++) {
                    for (int k = i; k <= j; k++) {
                        mRegex += charRegexs[k];
                    }
                    mRegex += "|";
                }
            }
            mRegex = mRegex.substring(0, mRegex.length() - 1);
            regex.add(mRegex);
        }
    }

    /**
     * 去重<br/>
     * 我会删掉声调<br/>
     * 不要随便用我，我是专用的，不是通用的，想去重自己写去，别用我
     */
    private String[] disrepeat(String[] array) {
        List<String> tmpArray = new LinkedList<String>();
        for (String py : array) {
            String tpy = py.substring(0, py.length() - 1);//去声调
            if (!tmpArray.contains(tpy)) {
                tmpArray.add(tpy);
            }
        }

        String [] result = new String[tmpArray.size()];
        for (int i = 0; i < tmpArray.size(); i++) {
            result[i] = tmpArray.get(i);
        }
        return result;
    }
}
